Unit options include: Lossperacre, Lossperclaim, Loss, Acres
#agloss_supervised.R
#creates a map of an unsupervised model of damage cause by county,
#for a select year, commodity, and unit.
#
#unit examples:
#
#Loss
#Lossperacre
#Lossperclaim
#
#Example:
#
#damagetype options: waterscarcity OR waterexcess
#agloss_unsupervised(2014, WHEAT, Lossperacres, waterscarcity)
#access data from nextcloud
#damage <- fread("https://nextcloud.sesync.org/index.php/s/W5kztdb5ZkxRptT/download", header = TRUE)
#access data from local UI server
#damage <- read.csv("/dmine/code/git/dmine-clustering/damage.csv")
#damage <- damage[,3:13]
#access data from google drive
id <- "1RxLEi0DiwMZFIw5CSBaEFjlLG67bKyLV" # google file ID
damage <- fread(sprintf("https://docs.google.com/uc?id=%s&export=download", id), header = TRUE)
damage <- damage[,2:12]
#access data from local sesync server
#setwd("/nfs/soilsesfeedback-data/data/RMA/")
#damage <- read.csv("RMA_damage_combined.csv", strip.white=TRUE)
#used for data from cloud
#colnames(damage) <- c("ID", "Year", "State", "County", "Commodity", "Damagecause", "Loss", "Count", "Acres", "Lossperacre", "Lossperclaim", "Acresperclaim")
#damage$State <- state.name[match(damage$State,state.abb)]
colnames(damage) <- c("Year", "State", "County", "Commodity", "Damagecause", "Loss", "Count", "Acres", "Lossperacre", "Lossperclaim", "Acresperclaim")
damage$State <- state.name[match(damage$State,state.abb)]
library(reshape2) ; damage_loss <- dcast(damage, State + County + Year + Commodity ~ Damagecause, value.var = c(units), sum)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
damage_loss <- subset(damage_loss, Commodity == crops)
damage_loss <- subset(damage_loss, Year == years)
#select only damage causes that are associates with Water Scarcity
damage_loss <- subset(damage_loss, select= waterscarcity)
damage_loss <- transform(damage_loss, ID=paste(State, County, sep="-"))
rownames(damage_loss) <- damage_loss$ID
damage_loss <- damage_loss[,5:length(waterscarcity)]
#remove damage causes that have NO values for ANY counties
damage_loss <- damage_loss[, colSums(damage_loss != 0) > 0]
#identify the optimum number of clusters using NbClust
par(mar=c(1,1,1,1))
library("NbClust")
clust<-NbClust(damage_loss[,1:(length(waterscarcity)-4)], diss=NULL, distance = clustering_distance, min.nc=2, max.nc=20, method = "kmeans", index = "all")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 3 proposed 2 as the best number of clusters
## * 2 proposed 3 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 8 proposed 8 as the best number of clusters
## * 3 proposed 13 as the best number of clusters
## * 1 proposed 14 as the best number of clusters
## * 1 proposed 17 as the best number of clusters
## * 2 proposed 18 as the best number of clusters
## * 1 proposed 19 as the best number of clusters
## * 1 proposed 20 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 8
##
##
## *******************************************************************
#res$All.index
#res$Best.nc
#res$All.CriticalValues
#res$Best.partition
maxclusters <- length(unique(clust$Best.partition))
km.res <- kmeans(damage_loss, maxclusters, nstart = 25)
pam.res <- pam(damage_loss, maxclusters)
hc.cut <- hcut(damage_loss, k = maxclusters, hc_method = "complete")
library("factoextra")
clusterplot <- fviz_cluster(km.res, data = damage_loss, palette = "Spectral", ellipse.type = "norm", geom = c("point"), main = paste(crops, " ", years, " ", units, sep=""))+
theme_bw()
clusterplot
counties <- readShapePoly('/dmine/data/counties/UScounties_conus.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
#--damage_loss
damage_loss_km <- data.frame(km.res$cluster)
damage_loss_km$State <- rownames(damage_loss_km)
cluster <- data.frame(damage_loss_km$km.res.cluster)
colnames(cluster) <- c("cluster")
damage_loss_km <- data.frame(do.call('rbind', strsplit(as.character(damage_loss_km$State),'-',fixed=TRUE)))
damage_loss_km <- cbind(cluster$cluster, damage_loss_km)
colnames(damage_loss_km) <- c("Cluster", "STATE_NAME", "NAME")
m <- merge(counties, damage_loss_km, by=c("STATE_NAME", "NAME"))
pal <- colorFactor(brewer.pal(maxclusters, "Spectral"), na.color = "#ffffff", domain = eval(parse(text=paste("m$", "Cluster", sep=""))))
exte <- as.vector(extent(counties))
label <- paste(sep = "<br/>", m$STATE_NAME, round(eval(parse(text=paste("m$", "Cluster", sep=""))), 0))
markers <- data.frame(label)
labs <- as.list(eval(parse(text=paste("m$", "Cluster", sep=""))))
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 16px;
}
"))
title <- tags$div(
tag.map.title, HTML("KMeans Damage Cause Clustering: Water Scarcity")
)
map <- leaflet(data = m) %>% addProviderTiles("Stamen.TonerLite") %>% addControl(title, position = "topleft", className="map-title") %>% fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(color = ~pal(eval(parse(text=paste("m$", "Cluster", sep="")))), popup = markers$label, weight = 1) %>%
addLegend(pal = pal, values = ~na.omit(eval(parse(text=paste("m$", "Cluster", sep="")))), labels = c("1", "2"), opacity = .5, title = paste(years, " ", crops, " ", units, sep=""),
position = "bottomright")
map